
library(MASS)
library(invgamma)

# load the training source code
source('Training code for EIV-SVT-Frechet.R')




#########################
### global parameters ###
#########################

# user-specified parameters
MC <- 100
n <- 100
p <- 50
N <- 1000

### global parameters

# orthonormal vectors
set.seed(100)
dummy_mat <- matrix(rnorm(p^2), nrow = p, ncol = p)
basisX <- eigen(t(dummy_mat) %*% dummy_mat)$vectors

# eigenvalues
covXeigen_seq <- sort(exp(seq(log(1), log(1e-3), length.out = p)), decreasing = TRUE)
covXeigen_seq <- covXeigen_seq / sum(covXeigen_seq) * p
sum(covXeigen_seq[1:(p/3)]) / sum(covXeigen_seq)

# covariance
SigmaX <- basisX %*% diag(covXeigen_seq) %*% t(basisX)
sort(diag(SigmaX))

# measurement error
SigmaE <- 5e-2

# regression parameter
alpha <- 1
beta <- rep(1/sqrt(p), p)

sd_Y <- 0.5
sd_Q1 <- (1 / 0.25)^2 + 2
sd_Q2 <- 1 * (sd_Q1 - 1)

# evaluation points in quantile
u <- seq(0, 1, length.out = 101)
u <- u[c(-1, -length(u))]

# fine grid for singular value threshold
lambda_seq <- sort(seq(0, sqrt(max(covXeigen_seq) * (p / n) * 0.75), length.out = 20))





##################
### PREDICTION ###
##################

MSPE_ref_MC <- rep(NA, MC)
MSPE_svt_MC <- matrix(NA, nrow = MC, ncol = length(lambda_seq))

for (mc in 1:MC) {
  
  set.seed(1 + (mc-1) * MC)
  
  cat('PREDICTION: MC iteration: ', mc, '\n')
  
  # generating a random sample
  sample_gen <- sample_gen_fun(n = n, N = N, p = p,
                               SigmaX = SigmaX, SigmaE = SigmaE, 
                               alpha = alpha, beta = beta, 
                               sd_Y = sd_Y, sd_Q1 = sd_Q1, sd_Q2 = sd_Q2)
  
  # training set
  X <- sample_gen$X
  Z <- sample_gen$Z
  Q <- sample_gen$Q
  Q0 <- sample_gen$Q0
  mu_Y <- apply(X, 1, mu)
  
  # test set
  X_new <- sample_gen$X_new
  Z_new <- sample_gen$Z_new
  Q_new <- sample_gen$Q_new
  Q0_new <- sample_gen$Q0_new
  mu_Y_new <- apply(X_new, 1, mu)
  
  # ref: MSPE
  fit_ref <- frechet_lambda_fun(lambda = 0,
                                X = X, Z = X, Q = Q, X_new = X_new, Q_new = Q_new)
  
  MSPE_ref_MC[mc] <- fit_ref$MSPE
  
  for (l in 1:length(lambda_seq)) {
    
    # svt: MSPE
    fit_svt <- frechet_lambda_fun(lambda = lambda_seq[l], 
                                  X = X, Z = Z, Q = Q, X_new = X_new, Q_new = Q_new)
    
    MSPE_svt_MC[mc, l] <- fit_svt$MSPE
    
  }

}

# optimal lambda
ind_lambda <- which.min(apply(MSPE_svt_MC, 2, mean))
lambda_opt <- lambda_seq[ind_lambda]



### Figure 1 (left)
mspe_col <- c('red', rep('white', length(lambda_seq)-1), 'black')
mspe_col[ind_lambda] <- 'blue'

par(mfrow = c(1,1))
par(mar=c(4.5,4.5,1,4.5)+0.1)
boxplot(cbind(MSPE_svt_MC, MSPE_ref_MC),
        cex.lab = 1.25, cex.axis = 1.15,
        cex = 0.25, outline = FALSE,
        ylab = 'MSPE',
        ylim = c(0.35, 0.95),
        col = mspe_col,
        xaxt = 'n')

tshd_label <- seq(1, length(lambda_seq), length.out = 7)

axis(1, tshd_label,
     cex.lab = 1.25, cex.axis = 1.15,
     labels = round(c(0,(lambda_seq)[tshd_label][-1]), 2),
     line = 0)
mtext('Threshold', 1, line = 3,
      cex = 1.25)
grid()
legend('topleft', c('REF', 'EIV', 'SVT'), cex = 1.25,
       fill = c('black', 'red', 'blue'), 
       bty = 'n')


### Table 1: MSPE
round(mean(MSPE_ref_MC), 3); round(mean(MSPE_svt_MC[,1]), 3); round(mean(MSPE_svt_MC[,ind_lambda]), 3)





#####################################
### ESTIMATION and REGRESSION FIT ###
#####################################

# evaluation grid for model estimation
set.seed(999)
x_eval <- mvrnorm(500, rep(0, p), Sigma = SigmaX)

mu_Y_eval <- apply(x_eval, 1, mu)
Q0_eval <- matrix(nrow = nrow(x_eval), ncol = length(u))
for (m in 1:nrow(x_eval)) {
  
  Q0_eval[m,] <- qnorm(u, mu_Y_eval[m], sd_Q2 / (sd_Q1 - 1))
  
}

MSE_ref_MC <- MSE_eiv_MC <- MSE_svt_MC <- rep(NA, MC)
est_ref_MC <- est_eiv_MC <- est_svt_MC <- array(NA, dim = c(MC, nrow(x_eval), length(u)))

for (mc in 1:MC) {
  
  set.seed(1 + (mc-1) * MC)
  
  cat('ESTIMATION: MC iteration: ', mc, '\n')
  
  # generating a random sample
  sample_gen <- sample_gen_fun(n = n, N = N, p = p,
                               SigmaX = SigmaX, SigmaE = SigmaE, 
                               alpha = alpha, beta = beta, 
                               sd_Y = sd_Y, sd_Q1 = sd_Q1, sd_Q2 = sd_Q2)
  
  # training set
  X <- sample_gen$X
  Z <- sample_gen$Z
  Q <- sample_gen$Q
  Q0 <- sample_gen$Q0
  mu_Y <- apply(X, 1, mu)
  
  ### REF
  # ref: MSE
  fit_ref <- frechet_lambda_fun(lambda = 0,
                                X = X, Z = X, Q = Q, X_new = X, Q_new = Q)
  
  MSE_ref_MC[mc] <- fit_ref$MSPE
  
  # ref: estimation
  est_ref <- frechet_lambda_fun(lambda = 0,
                                X = X, Z = X, Q = Q, X_new = x_eval)
  
  est_ref_MC[mc, , ] <- est_ref$Q_pred
  
  
  ### EIV
  # eiv: MSE
  fit_eiv <- frechet_lambda_fun(lambda = 0,
                                X = X, Z = Z, Q = Q, X_new = X, Q_new = Q)
  
  MSE_eiv_MC[mc] <- fit_eiv$MSPE
  
  # eiv: estimation
  est_eiv <- frechet_lambda_fun(lambda = 0, 
                                X = X, Z = Z, Q = Q, X_new = x_eval)
  
  est_eiv_MC[mc, , ] <- est_eiv$Q_pred
  
  
  ### SVT
  # svt: MSE
  fit_svt <- frechet_lambda_fun(lambda = lambda_opt,
                                X = X, Z = Z, Q = Q, X_new = X, Q_new = Q)
  
  MSE_svt_MC[mc] <- fit_svt$MSPE
  
  # svt: estimation
  est_svt <- frechet_lambda_fun(lambda = lambda_opt, 
                                X = X, Z = Z, Q = Q, X_new = x_eval)
  
  est_svt_MC[mc, , ] <- est_svt$Q_pred
  
}



### combining MC experiments

# ref: bias-variance 
mean_est_ref_MC <- apply(est_ref_MC, c(2,3), mean)
bias_sq_est_ref_MC <- apply((mean_est_ref_MC - Q0_eval)^2, 1, sum) * diff(u)[1]
variance_est_ref_MC <- rep(0, nrow(x_eval))
for (mc in 1:MC) {
  tmp <- apply((est_ref_MC[mc, , ] - mean_est_ref_MC)^2, 1, sum) * diff(u)[1]
  variance_est_ref_MC <- variance_est_ref_MC + tmp
}
variance_est_ref_MC <- variance_est_ref_MC / MC

# eiv: bias-variance 
mean_est_eiv_MC <- apply(est_eiv_MC, c(2,3), mean)
bias_sq_est_eiv_MC <- apply((mean_est_eiv_MC - Q0_eval)^2, 1, sum) * diff(u)[1]
variance_est_eiv_MC <- rep(0, nrow(x_eval))
for (mc in 1:MC) {
  tmp <- apply((est_eiv_MC[mc, , ] - mean_est_eiv_MC)^2, 1, sum) * diff(u)[1]
  variance_est_eiv_MC <- variance_est_eiv_MC + tmp
}
variance_est_eiv_MC <- variance_est_eiv_MC / MC

# svt: bias-variance 
mean_est_svt_MC <- apply(est_svt_MC, c(2,3), mean)
bias_sq_est_svt_MC <- apply((mean_est_svt_MC - Q0_eval)^2, 1, sum) * diff(u)[1]
variance_est_svt_MC <- rep(0, nrow(x_eval))
for (mc in 1:MC) {
  tmp <- apply((est_svt_MC[mc, , ] - mean_est_svt_MC)^2, 1, sum) * diff(u)[1]
  variance_est_svt_MC <- variance_est_svt_MC + tmp
}
variance_est_svt_MC <- variance_est_svt_MC / MC


bias_sq_est_all_MC <- cbind(bias_sq_est_ref_MC, bias_sq_est_eiv_MC, bias_sq_est_svt_MC)
variance_est_all_MC <- cbind(variance_est_ref_MC, variance_est_eiv_MC, variance_est_svt_MC)
MSE_all_MC <- cbind(MSE_ref_MC, MSE_eiv_MC, MSE_svt_MC)



### Figure 1 (right)
par(mfrow = c(1,3))

# bias
par(mar=c(4.5,4.5,1,1)+0.1)
boxplot(bias_sq_est_all_MC, outline = FALSE,
        ylab = 'Squared Bias',
        names = c('REF', 'EIV', 'SVT'),
        col = c('black', 'red', 'blue'))
grid()

# variance
par(mar=c(4.5,4.5,1,1)+0.1)
boxplot(variance_est_all_MC, outline = FALSE,
        ylab = 'Variance',
        names = c('REF', 'EIV', 'SVT'),
        col = c('black', 'red', 'blue'))
grid()

# MSE
par(mar=c(4.5,4.5,1,1)+0.1)
boxplot(MSE_all_MC, outline = FALSE,
        ylab = 'MSE',
        names = c('REF', 'EIV', 'SVT'),
        col = c('black', 'red', 'blue'))
grid()


### Table 1: Bias, variance, MSE
# bias
as.numeric(round(sqrt(apply(bias_sq_est_all_MC, 2, mean)), 3))

# variance
as.numeric(round(sqrt(apply(variance_est_all_MC, 2, mean)), 3))

# MSE
as.numeric(round(apply(MSE_all_MC, 2, mean), 3))

